*******************************************************************************
**
** Estimate physician flows at the hospital-year level 
** Note: this script works in conjunction with 3b_wtd_transitions.py  
**
** Steps:
** 1. Run this script with 
** 	MAKEDATA_HOSPCLAIMS = 1
**	MAKEDATA_PROVIDER = 1
**	CROSSWALK_PROVIDER_FILE = 1
** 	HOSP_LEVEL_TOTAL = 1 
**	MAKEDATA_TRANSITIONS = 0 
**	MAKE_SHARES = 0
**
** 2. Run wtd_transitions.py (see script for Bash command)
**
**
** 3. Repeat this script, but flip on/ off the globas 
** 	MAKEDATA_HOSPCLAIMS = 0
**	MAKEDATA_PROVIDER = 0
**	CROSSWALK_PROVIDER_FILE = 0
** 	HOSP_LEVEL_TOTAL = 0
**	MAKEDATA_TRANSITIONS = 1 
**	MAKE_SHARES = 1
**
**
*******************************************************************************



capture log close
clear all 

**** SPECIFY MEDICARE FILE TYPES AND PCT 
local pct 100
local filetypes = "ip op" 

* for each file type, specify the year in which the revenue center codes are in the revenue center file 
if "`ftype'" == "op" {
	local chyear = 2001 
}
if "`ftype'" == "ip" {
	local chyear = 2002
}

***** YEARS TO COMPUTE TRANSITIONS 
* note: this is different from the years for which we need data. 
* e.g transitions for year y depend on data from y-1
local startyr_transitions = 2003
local endyr_transitions = 2014


***** FILEPATHS 
local medicare_filepath = "/disk/aging/medicare/data/`pct'pct" 

local fpath_physicianflows = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/physician_flows"

local fpath_nppes ="/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/nppes/output"

local fpath_build_main = "/disk/agedisk4/medicare.work/sacarny-DUA51934/shruthi-dua51934/replication_files/build/output"

log using "`fpath_physicianflows'/prepare.log", replace

* specify variable names to be calculated 
local categories "entrants_sh exits_sh stayers_sh" 
local wtd_cat "wpat_entrants wpat_exits wpat_stayers wcost_entrants wcost_exits wcost_stayers"


***** GLOBALS 
* these are the years for which we need RAW data, not the years for which we are computing  transitions!!
global STARTYEAR = 2002
global ENDYEAR = 2014

* compile hospital claims from the 100% IP and OP files? 
global MAKEDATA_HOSPCLAIMS = 0

* collapse the hospital claims to a hospital-physician-year level file with totals for charges and benes?
global MAKEDATA_PROVIDER = 0

* xwalk the provider level file to the synthetic ID 
global CROSSWALK_PROVIDER_FILE = 0

* collapse the hospital-physician-year level file to a hospital-year level file with totals for charges , number of beneficiaries, and number of physicians? 
* this will be used in the denominator for the calculations 
global HOSP_LEVEL_TOTAL = 0

* append together the transition files created in wtd_transitions.py?
* NOTE: this needs to be run AFTER wtd_transitions.py
global MAKEDATA_TRANSITIONS = 1

* make hospital-year level file with shares 
* pulls info from main hospital-year analytic file  
global MAKE_SHARES = 1

* balance panel of hospitals when making the samples? 
* NOTE: this is only in effect when MAKE_SHARES = 1 
global BALANCEPANEL = 0


* get a list of all physician NPIs, UPINs + hospital PNs for years 1999-2015
* 
* OP file: ER claims only
* IP file: all claims 
* 
if $MAKEDATA_HOSPCLAIMS == 1 { 

foreach ftype in `filetypes' {

forvalues year = $STARTYEAR(1)$ENDYEAR {

di "****Processing data for the `type' file in `year'****"  

if `year' <= 2005 {
	local bene_id "ehic"
	}
	
if `year' > 2005 {
	local bene_id "bene_id"
	}


* revenue center files have revenue center codes required to identify hospital claims 
* years before "chyear" have revenue center info rolled up into IP/OP files 
* so only read the IP/OP claims file (no need to merge the revenue center file) 
if (`year' < `chyear') {
	
	* read in the line file
	use `bene_id' at_npi at_upin provider rvcntr* tot_chrg using "`medicare_filepath'/`ftype'/`year'/`ftype'`year'.dta", clear

	* from outpatient file: keep only the er claims using the revenue center codes
	if "`type'" == "op" {
		quietly desc rvcntr*, varlist 
		gen er = 0
		foreach v in `r(varlist)' {
			replace er = er + inlist(`v', "0450", "0451", "0452", "0456", "0459", "0981") 
		}	
		keep if er > 0
	}		
}
	
if (`year' >= `chyear') {

	* read in the claims file 
	if `year' < 2006 {
		use `bene_id' claimindex at_npi at_upin provider tot_chrg using "`medicare_filepath'/`ftype'/`year'/`ftype'c`year'.dta" , clear
	} 
	if `year' >= 2006 {
		use `bene_id' clm_id at_npi at_upin provider tot_chrg using "`medicare_filepath'/`ftype'/`year'/`ftype'c`year'.dta" , clear
	}
	
	* merge to the revenue center file to select outpatient ER claims
	if "`type'" == "op" {			
		* the outpatient revenue center file claim index changes in 2006. Before 2006 it was claimindex, after 2006 it's a combo of `bene_id' and clm_id
		if `year' < 2006 {
			merge 1:m claimindex using "`medicare_filepath'/`ftype'/`year'/`ftype'r`year'.dta", keepusing(rev_cntr) nogen 
		}
		
		if `year' >= 2006 {
			merge 1:m `bene_id' clm_id using "`medicare_filepath'/`ftype'/`year'/`ftype'r`year'.dta", keepusing(rev_cntr) nogen 
		}
		
		* keep only the ER revenue center codes 
		keep if inlist(rev_cntr, "0450", "0451", "0452", "0456", "0459", "0981") 
	}

}

rename `bene_id' bene_id

* collapse to the patient-provider-charges 
keep bene_id at_npi at_upin provider tot_chrg 
duplicates drop

* record the year 
gen year = `year' 
 
* save the file
save "`fpath_physicianflows'/data/`ftype'/hospclaims_`ftype'`pct'_`year'.dta", replace 
}

} 
}

* collapse the hospital claims to a provider-year level file w/ totals for benes and charges 
if ($MAKEDATA_PROVIDER == 1) {
	foreach filetype in `filetypes' {
		forvalues y = $STARTYEAR(1)$ENDYEAR { 
		
			di "**** Processing data for `y' ****" 
	
			* read the provider info for each year 
			use "`fpath_physicianflows'/input/`filetype'/hospclaims_`filetype'`pct'_`y'", clear 
			rename (at_upin at_npi) (upin npi) 
			drop if missing(upin) & missing(npi) 
	
			* generate a physician ID variable 
			* use UPIN for years 2004-2007 
			* use NPI for years 2008-2014
			gen docid = ""
			label var docid "=upin for 2002-2007, =npi for 2008-2014"
			if inrange(`y', 2002, 2007) {
				* keep individual UPINs only
				keep if regexm(upin,"^[A-M]") == 1
				replace docid = upin				
 			} 
			if inrange(`y', 2008, 2014) {
				* keep individual NPIs only (filter through NPPES)
				merge m:1 npi using "`fpath_nppes'/nppes_npi", keep(match) nogen
				replace docid = npi
			}
			
			* check that the docid is never missing 
			drop if missing(docid)
			drop if docid == "0000000000"

			* check that the provider number is not missing 	
			assert missing(provider) == 0 
	
			* keep only the provider, docid, charges and bene id
			keep provider year docid tot_chrg bene_id 

			* save to a tempfile
			tempfile filled`y'
			save `filled`y'' 
				
		} 
		* append together the files for all years 
		clear
		forvalues y = $STARTYEAR(1)$ENDYEAR {
			append using `filled`y''		
		} 
		save "`fpath_physicianflows'/input/combined/`filetype'_xwalked_altid`pct'", replace 
	}	



* append together the inpatient +  op er claims
use "`fpath_physicianflows'/input/combined/ip_xwalked_altid`pct'", clear
append using "`fpath_physicianflows'/input/combined/op_xwalked_altid`pct'" 


* count unique patients by physician each year 
bys year provider docid bene_id: gen npatphys = _n
replace npatphys = 0 if npatphys > 1

* count unique patients by hospital each year
bys year provider bene_id: gen temp = _n 
replace temp = 0 if temp > 1
bys year provider: egen npathosp = total(temp)
drop temp 

* collapse to docid year hospital level
collapse (sum) totchrg_phys=tot_chrg (sum) npatphys (mean) npathosp, by(year provider docid) fast
sort year docid provider 
rename provider pn 

* save physician level statistics 
save "`fpath_physicianflows'/input/combined/providers_altid_unrestricted_pct`pct'", replace 
}

* crosswalk the physician level statistics to the synthetic ID concept
if ($CROSSWALK_PROVIDER_FILE == 1) { 
	* read the PNs we have in the main  analytic file 
	use pn year id acq_legacy target target2 acq_other forprofit using "`fpath_build_main'/acq_cleaned_complete_20230606", clear 
	expand 2 if year == 2003, gen(synth) 
	replace year = 2002 if synth == 1
	drop synth
	duplicates drop 
	isid pn year 
	
	* merge the the hospital-physician-year level file 
	* keep only the PNs that show up at all in our main analytic file 
	merge 1:m pn year using "`fpath_physicianflows'/input/combined/providers_altid_unrestricted_pct100.dta", keep(match) gen(main_pn_match)

	save "`fpath_physicianflows'/input/combined/providers_altid_match_pct100_20230606.dta", replace 
	
	** Save to CSV for usein wtd_ transitions.py 
	export delimited using "`fpath_physicianflows'/input/combined/providers_altid_match_pct100_20230606.csv", replace 
*	save "`fpath_physicianflows'/input/combined/providers_altid_match_pct100.dta", replace 
}


* total the number of physicians, beneficiaries, and physician charges at each hospital-year level 
if ($HOSP_LEVEL_TOTAL == 1) {
	use "`fpath_physicianflows'/input/combined/providers_altid_match_pct100_20230606.dta", clear

	* collapse to id-year level
	bys id year: gen lines = 1
	collapse (sum) totchrg_hosp=totchrg_phys nphyshosp=lines (mean) npathosp, by(id year) fast

	* bring in the acquirer and other hospital information 
	preserve 
	use id year nonacq* acq_legacy target target2 acq_other ind* hospbd lhospbd hrrcode forprofit using "`fpath_build_main'/acq_cleaned_complete_20230606", clear 
	expand 2 if year == 2003, gen(synth)
	replace year = 2002 if synth == 1
	drop synth
	duplicates drop 
	tempfile temp
	save `temp', replace 
	restore 
	
	merge 1:1 id year using `temp', gen(hosplevel_merge)
	
	*save "input/combined/hosplevel_altid_unrestricted_pct100", replace 
	save "`fpath_physicianflows'/input/combined/hosplevel_altid_unrestricted_pct100_`startyr_transitions'_`endyr_transitions'_20230606", replace
}


**********
**********
********** IMPORTANT: BEFORE PROCEEDING TO THE NEXT STEP, 
********** NEED TO RUN WTD_TRANSITIONS.PY 
********** The following steps use the hospital level output from that file. 
**********
**********
* append together the transition files created in wtd_transitions.py 
if $MAKEDATA_TRANSITIONS == 1{
	forvalues y = `startyr_transitions'(1)`endyr_transitions'  {
		di "the year is `y'"
		insheet using "`fpath_physicianflows'/input/transitions/transitions_altid_wtd`y'pct100_20230606.csv" , clear
		if `y' == 2002 { 
			replace stayers = "0"
			replace exits = "0"
			destring stayers exits, replace
		}
		if `y' == 2008 {
			foreach v in stayers {
				di "Destringing `v'"
				replace `v' = "0" if `v' == "False"
				destring `v' , replace 
			}
			}
		tempfile temp`y'
		save `temp`y'', replace 	
	}
	clear 
	forvalues y = `startyr_transitions'(1)`endyr_transitions' {
		append using `temp`y''
	}
	drop if missing(id)
	save "`fpath_physicianflows'/input/transitions/transitions_wtd`startyr_transitions'_`endyr_transitions'_pct`pct'_20230606", replace 
}  



* make entrant, exit, and churn shares
* unweighted, patient-weighted and charges weighted 
if $MAKE_SHARES == 1 { 
	* bring in the appended transitions file 

	use "`fpath_physicianflows'/input/transitions/transitions_wtd`startyr_transitions'_`endyr_transitions'_pct`pct'_20230606", clear  
	
	* merge with the hospital denominator file 
	merge 1:1 id year using "`fpath_physicianflows'/input/combined/hosplevel_altid_unrestricted_pct100_`startyr_transitions'_`endyr_transitions'", gen(transitions_merge) 
	keep if transitions_merge == 3 | year == 2002

	* generate the unweighted entrant/ exit/ stayer share variables 
	egen id2 = group(id) 
	tsset id2 year 
	gen entrants_sh = entrants / (.5*(nphyshosp + L.nphyshosp))
	gen stayers_sh = stayers / (.5*(nphyshosp + L.nphyshosp))
	gen exits_sh = exits / (.5*(nphyshosp + L.nphyshosp))

	*churn measure: entrants + exits
	gen churn_sh = (entrants + exits) / (.5*(nphyshosp + L.nphyshosp))
	gen wpat_churn = wpat_entrant + wpat_exit
	gen wcost_churn = wcost_entrant + wcost_exit

	* labels for chart title
	label var wpat_entrant "Patient-weighted entrant shares"
	label var wpat_exit "Patient-weighted exit shares"
	label var wpat_stayer "Patient-weighted stayer shares"
	label var wpat_churn "Patient-weighted entrant + exit shares "
	label var wcost_entrant "Charges-weighted entrant shares"
	label var wcost_exit "Charges-weighted exit shares"
	label var wcost_stayer "Charges-weighted stayer shares"
	label var wcost_churn "Charges-weighted entrant + exit shares"

	label var entrants_sh "Entrant shares" 
	label var stayers_sh "Stayer shares"
	label var exits_sh "Exit shares" 
	label var churn_sh "Churn shares" 
	* clean up
	drop if year < 2003 | year > 2014
	keep if hosplevel_merge == 3
	drop *_merge 

	* balance panel of hospitals 
	if ($BALANCEPANEL == 1) { 	
		preserve 	
		tempfile tmp 		
		keep id year 	
		drop if year < 2004 | year > 2014
		bys id: gen t = _N  
		drop if t != 2014 - 2004 + 1 
		duplicates drop id, force
		keep id 
		save `tmp', replace 
		restore 
		merge m:1 id using `tmp',  keep(match) nogen
	}

	save "`fpath_physicianflows'/output/physician_flows_data`startyr_transitions'_`endyr_transitions'_20230606.dta", replace 
	}


log close 






